"""
This file is converted by MindConverter.

Use command `mindconverter --model_file bert_zh.onnx --shape 1,512 1,512 1,512  \
                           --input_nodes input_ids attention_mask token_type_ids  \
                           --output_nodes output_0 output_1`.
"""
import numpy as np
import mindspore
from mindspore import nn
from mindspore import Tensor, Parameter
from mindspore.ops import operations as P


class LayerNorm1(nn.Cell):
    def __init__(self):
        super(LayerNorm1, self).__init__()
        self.reducemean_0 = P.ReduceMean(keep_dims=True)
        self.reducemean_0_axis = -1
        self.sub_1 = P.Sub()
        self.pow_2 = P.Pow()
        self.pow_2_input_weight = 2.0
        self.reducemean_3 = P.ReduceMean(keep_dims=True)
        self.reducemean_3_axis = -1
        self.add_4 = P.Add()
        self.add_4_bias = 9.999999960041972e-13
        self.sqrt_5 = P.Sqrt()
        self.div_6 = P.Div()
        self.mul_7 = P.Mul()
        self.mul_7_w = Parameter(Tensor(np.random.uniform(0, 1, (768,)).astype(np.float32)), name=None)
        self.add_8 = P.Add()
        self.add_8_bias = Parameter(Tensor(np.random.uniform(0, 1, (768,)).astype(np.float32)), name=None)

    def construct(self, x):
        opt_reducemean_0 = self.reducemean_0(x, self.reducemean_0_axis)
        opt_sub_1 = self.sub_1(x, opt_reducemean_0)
        opt_pow_2 = self.pow_2(opt_sub_1, self.pow_2_input_weight)
        opt_reducemean_3 = self.reducemean_3(opt_pow_2, self.reducemean_3_axis)
        opt_add_4 = self.add_4(opt_reducemean_3, self.add_4_bias)
        opt_sqrt_5 = self.sqrt_5(opt_add_4)
        opt_div_6 = self.div_6(opt_sub_1, opt_sqrt_5)
        opt_mul_7 = self.mul_7(opt_div_6, self.mul_7_w)
        opt_add_8 = self.add_8(opt_mul_7, self.add_8_bias)
        return opt_add_8


class MultiHeadAttn(nn.Cell):
    def __init__(self):
        super(MultiHeadAttn, self).__init__()
        self.matmul_0 = nn.MatMul()
        self.matmul_0_w = Parameter(Tensor(np.random.uniform(0, 1, (768, 768)).astype(np.float32)), name=None)
        self.matmul_1 = nn.MatMul()
        self.matmul_1_w = Parameter(Tensor(np.random.uniform(0, 1, (768, 768)).astype(np.float32)), name=None)
        self.matmul_2 = nn.MatMul()
        self.matmul_2_w = Parameter(Tensor(np.random.uniform(0, 1, (768, 768)).astype(np.float32)), name=None)
        self.add_3 = P.Add()
        self.add_3_bias = Parameter(Tensor(np.random.uniform(0, 1, (768,)).astype(np.float32)), name=None)
        self.add_4 = P.Add()
        self.add_4_bias = Parameter(Tensor(np.random.uniform(0, 1, (768,)).astype(np.float32)), name=None)
        self.add_5 = P.Add()
        self.add_5_bias = Parameter(Tensor(np.random.uniform(0, 1, (768,)).astype(np.float32)), name=None)
        self.reshape_6 = P.Reshape()
        self.reshape_6_shape = tuple([-1, 512, 12, 64])
        self.reshape_7 = P.Reshape()
        self.reshape_7_shape = tuple([-1, 512, 12, 64])
        self.reshape_8 = P.Reshape()
        self.reshape_8_shape = tuple([-1, 512, 12, 64])
        self.transpose_9 = P.Transpose()
        self.transpose_10 = P.Transpose()
        self.transpose_11 = P.Transpose()
        self.matmul_12 = nn.MatMul()
        self.div_13 = P.Div()
        self.div_13_w = 8.0
        self.add_14 = P.Add()
        self.softmax_15 = nn.Softmax(axis=3)
        self.matmul_16 = nn.MatMul()
        self.transpose_17 = P.Transpose()
        self.reshape_18 = P.Reshape()
        self.reshape_18_shape = tuple([-1, 512, 768])
        self.matmul_19 = nn.MatMul()
        self.matmul_19_w = Parameter(Tensor(np.random.uniform(0, 1, (768, 768)).astype(np.float32)), name=None)
        self.add_20 = P.Add()
        self.add_20_bias = Parameter(Tensor(np.random.uniform(0, 1, (768,)).astype(np.float32)), name=None)

    def construct(self, x, x0):
        opt_matmul_0 = self.matmul_0(x, self.matmul_0_w)
        opt_matmul_1 = self.matmul_1(x, self.matmul_1_w)
        opt_matmul_2 = self.matmul_2(x, self.matmul_2_w)
        opt_add_3 = self.add_3(opt_matmul_0, self.add_3_bias)
        opt_add_4 = self.add_4(opt_matmul_1, self.add_4_bias)
        opt_add_5 = self.add_5(opt_matmul_2, self.add_5_bias)
        opt_reshape_6 = self.reshape_6(opt_add_3, self.reshape_6_shape)
        opt_reshape_7 = self.reshape_7(opt_add_4, self.reshape_7_shape)
        opt_reshape_8 = self.reshape_8(opt_add_5, self.reshape_8_shape)
        opt_transpose_9 = self.transpose_9(opt_reshape_6, (0, 2, 1, 3))
        opt_transpose_10 = self.transpose_10(opt_reshape_7, (0, 2, 3, 1))
        opt_transpose_11 = self.transpose_11(opt_reshape_8, (0, 2, 1, 3))
        opt_matmul_12 = self.matmul_12(opt_transpose_9, opt_transpose_10)
        opt_div_13 = self.div_13(opt_matmul_12, self.div_13_w)
        opt_add_14 = self.add_14(opt_div_13, x0)
        opt_softmax_15 = self.softmax_15(opt_add_14)
        opt_matmul_16 = self.matmul_16(opt_softmax_15, opt_transpose_11)
        opt_transpose_17 = self.transpose_17(opt_matmul_16, (0, 2, 1, 3))
        opt_reshape_18 = self.reshape_18(opt_transpose_17, self.reshape_18_shape)
        opt_matmul_19 = self.matmul_19(opt_reshape_18, self.matmul_19_w)
        opt_add_20 = self.add_20(opt_matmul_19, self.add_20_bias)
        return opt_add_20


class Linear3(nn.Cell):
    def __init__(self, matmul_0_w_shape, add_1_bias_shape):
        super(Linear3, self).__init__()
        self.matmul_0 = nn.MatMul()
        self.matmul_0_w = Parameter(Tensor(np.random.uniform(0, 1, matmul_0_w_shape).astype(np.float32)), name=None)
        self.add_1 = P.Add()
        self.add_1_bias = Parameter(Tensor(np.random.uniform(0, 1, add_1_bias_shape).astype(np.float32)), name=None)

    def construct(self, x):
        opt_matmul_0 = self.matmul_0(x, self.matmul_0_w)
        opt_add_1 = self.add_1(opt_matmul_0, self.add_1_bias)
        return opt_add_1


class GeLU1(nn.Cell):
    def __init__(self):
        super(GeLU1, self).__init__()
        self.div_0 = P.Div()
        self.div_0_w = 1.4142135381698608
        self.erf_1 = P.Erf()
        self.add_2 = P.Add()
        self.add_2_bias = 1.0
        self.mul_3 = P.Mul()
        self.mul_4 = P.Mul()
        self.mul_4_w = 0.5

    def construct(self, x):
        opt_div_0 = self.div_0(x, self.div_0_w)
        opt_erf_1 = self.erf_1(opt_div_0)
        opt_add_2 = self.add_2(opt_erf_1, self.add_2_bias)
        opt_mul_3 = self.mul_3(x, opt_add_2)
        opt_mul_4 = self.mul_4(opt_mul_3, self.mul_4_w)
        return opt_mul_4


class Module46(nn.Cell):
    def __init__(self, linear3_0_matmul_0_w_shape, linear3_0_add_1_bias_shape, linear3_1_matmul_0_w_shape,
                 linear3_1_add_1_bias_shape):
        super(Module46, self).__init__()
        self.multiheadattn_0 = MultiHeadAttn()
        self.add_0 = P.Add()
        self.layernorm1_0 = LayerNorm1()
        self.linear3_0 = Linear3(matmul_0_w_shape=linear3_0_matmul_0_w_shape,
                                 add_1_bias_shape=linear3_0_add_1_bias_shape)
        self.gelu1_0 = GeLU1()
        self.linear3_1 = Linear3(matmul_0_w_shape=linear3_1_matmul_0_w_shape,
                                 add_1_bias_shape=linear3_1_add_1_bias_shape)
        self.add_1 = P.Add()
        self.layernorm1_1 = LayerNorm1()

    def construct(self, x, x0):
        multiheadattn_0_opt = self.multiheadattn_0(x, x0)
        opt_add_0 = self.add_0(multiheadattn_0_opt, x)
        layernorm1_0_opt = self.layernorm1_0(opt_add_0)
        linear3_0_opt = self.linear3_0(layernorm1_0_opt)
        gelu1_0_opt = self.gelu1_0(linear3_0_opt)
        linear3_1_opt = self.linear3_1(gelu1_0_opt)
        opt_add_1 = self.add_1(linear3_1_opt, layernorm1_0_opt)
        layernorm1_1_opt = self.layernorm1_1(opt_add_1)
        return layernorm1_1_opt


class Module50(nn.Cell):
    def __init__(self):
        super(Module50, self).__init__()
        self.module46_0 = Module46(linear3_0_matmul_0_w_shape=(768, 3072),
                                   linear3_0_add_1_bias_shape=(3072,),
                                   linear3_1_matmul_0_w_shape=(3072, 768),
                                   linear3_1_add_1_bias_shape=(768,))
        self.module46_1 = Module46(linear3_0_matmul_0_w_shape=(768, 3072),
                                   linear3_0_add_1_bias_shape=(3072,),
                                   linear3_1_matmul_0_w_shape=(3072, 768),
                                   linear3_1_add_1_bias_shape=(768,))
        self.module46_2 = Module46(linear3_0_matmul_0_w_shape=(768, 3072),
                                   linear3_0_add_1_bias_shape=(3072,),
                                   linear3_1_matmul_0_w_shape=(3072, 768),
                                   linear3_1_add_1_bias_shape=(768,))
        self.module46_3 = Module46(linear3_0_matmul_0_w_shape=(768, 3072),
                                   linear3_0_add_1_bias_shape=(3072,),
                                   linear3_1_matmul_0_w_shape=(3072, 768),
                                   linear3_1_add_1_bias_shape=(768,))

    def construct(self, x, x0):
        module46_0_opt = self.module46_0(x, x0)
        module46_1_opt = self.module46_1(module46_0_opt, x0)
        module46_2_opt = self.module46_2(module46_1_opt, x0)
        module46_3_opt = self.module46_3(module46_2_opt, x0)
        return module46_3_opt


class Model(nn.Cell):
    def __init__(self):
        super(Model, self).__init__()
        self.expanddims_0 = P.ExpandDims()
        self.expanddims_0_axis = 1
        self.expanddims_3 = P.ExpandDims()
        self.expanddims_3_axis = 2
        self.cast_5 = P.Cast()
        self.cast_5_to = mindspore.float32
        self.sub_7 = P.Sub()
        self.sub_7_bias = 1.0
        self.mul_9 = P.Mul()
        self.mul_9_w = -10000.0
        self.gather_1_input_weight = Parameter(Tensor(np.random.uniform(0, 1, (21128, 768)).astype(np.float32)),
                                               name=None)
        self.gather_1_axis = 0
        self.gather_1 = P.Gather()
        self.gather_2_input_weight = Parameter(Tensor(np.random.uniform(0, 1, (2, 768)).astype(np.float32)), name=None)
        self.gather_2_axis = 0
        self.gather_2 = P.Gather()
        self.add_4 = P.Add()
        self.add_6 = P.Add()
        self.add_6_bias = Parameter(Tensor(np.random.uniform(0, 1, (1, 512, 768)).astype(np.float32)), name=None)
        self.layernorm1_0 = LayerNorm1()
        self.module50_0 = Module50()
        self.module50_1 = Module50()
        self.module50_2 = Module50()
        self.gather_618_input_weight = Tensor(np.array(0))
        self.gather_618_axis = 1
        self.gather_618 = P.Gather()
        self.dense_619 = nn.Dense(in_channels=768, out_channels=768, has_bias=True)
        self.tanh_620 = nn.Tanh()

    def construct(self, input_ids, attention_mask, token_type_ids):
        opt_expanddims_0 = self.expanddims_0(attention_mask, self.expanddims_0_axis)
        opt_expanddims_3 = self.expanddims_3(opt_expanddims_0, self.expanddims_3_axis)
        opt_cast_5 = self.cast_5(opt_expanddims_3, self.cast_5_to)
        opt_sub_7 = self.sub_7(self.sub_7_bias, opt_cast_5)
        opt_mul_9 = self.mul_9(opt_sub_7, self.mul_9_w)
        opt_gather_1_axis = self.gather_1_axis
        opt_gather_1 = self.gather_1(self.gather_1_input_weight, input_ids, opt_gather_1_axis)
        opt_gather_2_axis = self.gather_2_axis
        opt_gather_2 = self.gather_2(self.gather_2_input_weight, token_type_ids, opt_gather_2_axis)
        opt_add_4 = self.add_4(opt_gather_1, opt_gather_2)
        opt_add_6 = self.add_6(opt_add_4, self.add_6_bias)
        layernorm1_0_opt = self.layernorm1_0(opt_add_6)
        module50_0_opt = self.module50_0(layernorm1_0_opt, opt_mul_9)
        module50_1_opt = self.module50_1(module50_0_opt, opt_mul_9)
        module50_2_opt = self.module50_2(module50_1_opt, opt_mul_9)
        opt_gather_618_axis = self.gather_618_axis
        opt_gather_618 = self.gather_618(module50_2_opt, self.gather_618_input_weight, opt_gather_618_axis)
        opt_dense_619 = self.dense_619(opt_gather_618)
        opt_tanh_620 = self.tanh_620(opt_dense_619)
        return module50_2_opt, opt_tanh_620
